package com.ml4ai.spark.boot

import java.io.InputStream
import java.util

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession

import scala.collection.JavaConversions._

object Boot {


  def main(args: Array[String]): Unit = {
    val session = SparkSession.builder().getOrCreate()
    val sc = session.sparkContext
    val configuration = new Configuration
    val fs = FileSystem.get(configuration)
    val it = fs.listFiles(new Path("/"), true)
    val list: java.util.List[String] = new util.ArrayList[String]()
    while (it.hasNext) {
      val f = it.next
      val path = f.getPath
      val name = path.getName
      if (name.endsWith(".txt")) {
        list.add(path.toUri.getRawPath)
      }
    }
    val filePathsRDD = sc.parallelize(list)
    val ls = filePathsRDD.flatMap(filePath => {
      val text = Root.readAsText(filePath)
      text.split("\\n")
    })
    ls.foreach(print)
  }
}

object Root {

  var fs: FileSystem = FileSystem.get(new Configuration)

  def readAsText(filePath: String): String = {
    val inputStream: InputStream = fs.open(new Path(filePath))
    val bs: Array[Byte] = IOUtils.toByteArray(inputStream)
    new String(bs)
  }

}
